#
# createANESData.R
#
# Creates set of ANES data from cumulative file for
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

drop.kn <- T # Drop Knowledge Network internet respondents to 2012 survey.

# Variables used identified by authors from cumulative codebook.
vars.needed <- read.csv("ANES_Cum_Vars_ToUse.csv",as.is=T,row.names=1) 
print(vars.needed)

# Using Sept 25, 2014 release of cumulative file from http://www.electionstudies.org/studypages/download/datacenter_all_datasets.php.
library(foreign)
cat("Reading in cumulative data...\n");flush.console()
full.dat <- read.dta("anes_timeseries_cdf.dta",convert.underscore=T,convert.factor=F)
additional.2012.items <- NULL # no additional items using new cumulative file.
if (drop.kn) {
  with(full.dat,table(VCF0004,vcf0009x == 0))
  cat("Dropping",sum(full.dat$vcf0009x == 0),"2012 KN internet respondents...\n")
  full.dat <- full.dat[full.dat$vcf0009x != 0,]
  # Weight excluding web sample.
  full.dat[,"WEIGHT"] <- full.dat[,"vcf0009x"] # weight
} else {
  # Weight including web sample.
  full.dat[,"WEIGHT"] <- dat[,"vcf0009z"] # weight
}
# Make weight variable consistent with old naming.
full.dat[,"VCF0009a"] <- full.dat[,"WEIGHT"] # weight
anes.cdf.with.2012 <- full.dat

# Tabulation of years.
cat("unweighted tabulation of years:\n")
print(table(anes.cdf.with.2012$VCF0004))
cat("weighted tabulation of years:\n")
print(xtabs(with(anes.cdf.with.2012,VCF0009a~VCF0004)))

# Write it.
save(anes.cdf.with.2012,additional.2012.items,file="anes_cdf.RData")
